rm(list=ls())
setwd('~/Dropbox/Farm Subsidies and Committees/PB R&R/revision_regressions/')

library(ggplot2)
library(dplyr)
library(magrittr)
library(stringr)
library(haven)
library(statar)
library(gridExtra)


## load collapsed dataset
## ------------------------------------------------------------------------- ##

# get main dataset
collapsed_df = read_dta('~/Dropbox/Farm Subsidies and Committees/analyses/collapsed.dta')
collapsed_df %<>% filter(election_year_desc >= 2009 & any_neg == 0)

org_ids = read_dta('~/Dropbox/Farm Subsidies and Committees/analyses/org_ids.dta')
collapsed_df %<>% anti_join(org_ids, by = 'customer_code')


## create datasets
## ------------------------------------------------------------------------- ##

# avg across years for each customer_code
avg_df = collapsed_df %>% 
    group_by(customer_code) %>%
    summarise(amt = mean(amt_all)) %>%
    ungroup %>%
    mutate(whichdata = 'Mean across years')

# total across years for each customer_code
total_df = collapsed_df %>% 
    group_by(customer_code) %>%
    summarise(amt = sum(amt_all)) %>%
    ungroup %>%
    mutate(whichdata = 'Sum across years')

# mark which data in collapsed_df for appending
collapsed_df %<>% 
    mutate(whichdata = as.character(election_year_desc)) %>%
    select(amt = amt_all, whichdata)

# append everything
work_df = bind_rows(avg_df, total_df, collapsed_df)

# remove large dataframes to preserve memory
rm(avg_df, collapsed_df, total_df, org_ids)


## plot!
## ------------------------------------------------------------------------- ##

# http://www.sthda.com/english/wiki/ggplot2-violin-plot-quick-start-guide-r-software-and-data-visualization
work_df %>% 
    filter(amt > 0) %>%
    {
        ggplot(work_df, aes(y = log(amt), x = whichdata)) + 
            geom_violin() + 
            geom_boxplot(width = 0.1) +
            coord_flip() +
            labs(y = 'Payment amount (log scale)', 
                 x = '') + 
            scale_y_continuous(breaks = c(log(1), log(10), log(100), log(1000), log(10000), log(100000), log(1000000)),
                               labels = c('1', '10', '100', '1000', '10000', '100000', '1000000')) +
            theme_minimal() + 
            theme(text = element_text(size = 14),
                  plot.title = element_text(hjust = 0.5),
                  plot.caption = element_text(hjust = 0))
    }

ggsave('violin.pdf', width = 8, height = 5)
